/*****************************************************************

* Purpose:
*   This do file cleans the raw data to create variables at the
*   census tract level. It uses Census and ACS data downloaded 
*   from Social Explorer and NHGIS. The description of variables 
*   and corresponding tables can be found in the accompanied 
*   data_download excel sheet.
*
* Variables Created (Labels Only):
*   "Census Year"
*   "Metro Code"
*   "Metro Name"
*   "County Identifier"
*   "County Name"
*   "Census Tract Identifier"
*   "Income Bracker Number"
*   "Lower limit of income bracket"
*   "Upper limit of income bracket"
*   "Count of families"
*   "Count of families with kids"
*   "Count of families without kids"
*   "Population, Census Tract"
*   "Households, Census Tract"
*   "College Population, Census Tract"
*   "Total Population, Census Tract"
*   "Mean income of low and high brackets"
*   "Mean family income, census tract"
*   "Median family income, census tract"
*   "Mean family with kids income, census tract"
*   "Mean family without kids income, census tract"
*   "Median value of house, census tract"
*   "Aggregate Gross Rents, Census Tract"
*   "Median Rent, Census Tract"
*   "Number of units under Rent"
*   "Median Mortgage Cost"
*   "Number of units under Mortgage"
*   "Indicator for all family data"
*   "Indicator for families with kids data"
*   "Indicator for education data"
*   "Indicator for kids income data"
*   "Indicator for housing data"
*


** Note: We replace the countrycode for Miami-Dade County, FL from 12025 to 12086  in order to keep the countycode consistent with our crosswalk. 
*****************************************************************/



********************************************************************************
* Creating a single data set for all calculations
********************************************************************************

local year 1980 1990 2000 2010

********************************************************************************
* Import Family Income by Year
********************************************************************************
// Ensure that we aren't appending to an existing file
*cap rm temp_fam_inc.dta
tempfile temp_fam_inc

foreach y of local year{
	clear all
	// This If-Else clause imports the relevant data for each year and renames the
	// variables for each income bracket
	if `y' == 1980 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11723441_SL50.csv", clear
		// Rename bracket variables to fix syntax of reshape command
		renpfix se_t200_0 f_x
		renpfix f_x0 f_x
		
	} 
	else if  `y' == 1990 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11726811_SL140.csv", clear
		// Rename bracket variables to fix syntax of reshape command
		renpfix stf3_p107_0 f_x
		renpfix f_x0 f_x
		
	} 
	else if  `y' == 2000 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11726807_SL140.csv", clear
		// Rename bracket variables to fix syntax of reshape command
		renpfix sf3_p0760 f_x
		renpfix f_x0 f_x
	} 
	else if  `y' == 2010 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11216579_SL140.txt", clear
		local todrop
		foreach var of var * {
			if regexm("`var'","^acs.*s$") local todrop `todrop' `var'
		}
		drop `todrop'
		// Rename bracket variables to fix syntax of reshape command
		renpfix acs12_5yr_b191010 f_x
		renpfix f_x0 f_x
	}
	renpfix f_x g_x
	renpfix g_x f_x
	local nwords :  word count `r(varlist)'
	renpfix geo_		
	// FIPS ID for merging in MSA Crosswalk
	gen countyfips = state*1000 + county

	gen year = `y'
	// Data cleaning and updating so that it matches the 2003 OMB Crosswalk
	drop if state == 72 // get rid of PR because it has a metro repeat with Abilene TX 
*	destring countyfips fips, replace
	replace countyfips = 12086 if countyfips == 12025
	
	// Merge in the crosswalk
	merge m:1 countyfips using "$path\Raw_Data\2003_crosswalk_all_id_bc.dta"

	keep if _merge == 3
	drop _merge

	// Counts per income bracket at the MSA level
	novarabbrev{
		forvalues i =1(1)`nwords' {
			// MSA Level Count
			bysort metro: egen f_count_metro`i' = sum(f_x`i')
			// Tract Level Count
			bysort metro countyfips fips: egen f_count_ct`i' = sum(f_x`i')
			
			gen f_count_metro_tot`i' = f_count_metro1
			
		}
	}

	keep year metro countyfips fips f_count_* 
	duplicates drop 
	reshape long f_count_metro f_count_metro_tot f_count_ct, i(year metro countyfips fips) j(bracket_no)
	drop if bracket_no ==1 
	keep year metro countyfips fips bracket_no f_count_ct

	merge m:1 year bracket_no using "$path\Raw_Data\inc_brackets.dta"
	keep if _merge == 3
	drop _merge
	gen income = (low + high)/2
	
	
	cap append using `temp_fam_inc'
	save `temp_fam_inc', replace


}

********************************************************************************
* Import Median Family Income By Year
********************************************************************************
// Ensure that we aren't appending to an existing file
*cap rm temp_median_income.dta
tempfile temp_median_income

foreach y of local year{
	clear all
	// This If-Else clause imports the relevant data for each year and renames the
	// variables for each income bracket
	if `y' == 1980 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11727882_SL50.txt", clear
		rename se_t055_001 median_income
		
	} 
	else if  `y' == 1990 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11728701_SL140.txt", clear
		rename se_t043_001 median_income
	} 
	else if  `y' == 2000 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11728700_SL140.txt", clear
		rename se_t093_001 median_income

	} 
	else if  `y' == 2010 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11740927_SL140.txt", clear
		rename se_t061_001 median_income
	}

	renpfix geo_		
	// FIPS ID for merging in MSA Crosswalk
	gen countyfips = state*1000 + county

	gen year = `y'
	// Data cleaning and updating so that it matches the 2003 OMB Crosswalk
	drop if state == 72 // get rid of PR because it has a metro repeat with Abilene TX 
*	destring countyfips fips, replace
	replace countyfips = 12086 if countyfips == 12025
	
	// Merge in the crosswalk
	merge m:1 countyfips using "$path\Raw_Data\2003_crosswalk_all_id_bc.dta"
	keep if _merge == 3
	drop _merge
	
	keep year metro countyfips fips median_inc
	cap append using `temp_median_income'
	save `temp_median_income', replace



}

use `temp_fam_inc', clear
merge m:1 year metro countyfips fips using `temp_median_income'
keep if _merge == 3
drop _merge
save `temp_fam_inc', replace
*cap rm temp_median_income.dta

********************************************************************************
* Import Mean Family Income By Year
********************************************************************************
// Ensure that we aren't appending to an existing file
*cap rm temp_mean_income.dta
tempfile temp_mean_income


foreach y of local year{
	clear all
	// This If-Else clause imports the relevant data for each year and renames the
	// variables for each income bracket
	if `y' == 1980 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11731074_SL50.txt", clear
		rename se_t056_001 mean_income
		
		
	} 
	else if  `y' == 1990 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11731081_SL140.txt", clear
		rename se_t048_001 mean_income
	} 
	else if  `y' == 2000 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11731085_SL140.txt", clear
		rename se_t099_001 mean_income

	} 
	else if  `y' == 2010 {
		import delimited using "$raw_data\Social Explorer\Income\\`y'\Census Tract\R11739176_SL140.txt", clear
		rename se_t062_001 mean_income
	}

	renpfix geo_		
	// FIPS ID for merging in MSA Crosswalk
	gen countyfips = state*1000 + county

	gen year = `y'
	// Data cleaning and updating so that it matches the 2003 OMB Crosswalk
	drop if state == 72 // get rid of PR because it has a metro repeat with Abilene TX 
*	destring countyfips fips, replace
	replace countyfips = 12086 if countyfips == 12025
	
	// Merge in the crosswalk
	merge m:1 countyfips using "$path\Raw_Data\2003_crosswalk_all_id_bc.dta"
	keep if _merge == 3
	drop _merge
	
	keep year metro countyfips fips mean_income
	
	
	cap append using `temp_mean_income'
	save `temp_mean_income', replace

}

use `temp_fam_inc', clear
merge m:1 year metro countyfips fips using `temp_mean_income'
keep if _merge == 3
drop _merge
save `temp_fam_inc', replace
*cap rm temp_mean_income.dta

********************************************************************************
* Import Housing Values By Year
********************************************************************************
// Ensure that we aren't appending to an existing file
*cap rm temp_house_value.dta
tempfile temp_house_value


foreach y of local year{
	clear all
	// This If-Else clause imports the relevant data for each year and renames the
	// variables for each income bracket
	if `y' == 1980 {
		import delimited using "$raw_data\Social Explorer\Housing\\`y'\Census Tract\R11727888_SL50.txt", clear
		rename stf1_t046_001 households
		rename stf1_t047_001 house_value
		
		
	} 
	else if  `y' == 1990 {
		import delimited using "$raw_data\Social Explorer\Housing\\`y'\Census Tract\R11728692_SL140.txt", clear
		rename se_t079_001 households
		rename se_t080_001 house_value
	} 
	else if  `y' == 2000 {
		import delimited using "$raw_data\Social Explorer\Housing\\`y'\Census Tract\R11728688_SL140.txt", clear
		rename se_t162_001 households
		rename se_t163_001 house_value

	} 
	else if  `y' == 2010 {
		import delimited using "$raw_data\Social Explorer\Housing\\`y'\Census Tract\R11740923_SL140.txt", clear
		rename se_t100_001 households
		rename se_t101_001 house_value
	}

	renpfix geo_		
	// FIPS ID for merging in MSA Crosswalk
	gen countyfips = state*1000 + county

	gen year = `y'
	// Data cleaning and updating so that it matches the 2003 OMB Crosswalk
	drop if state == 72 // get rid of PR because it has a metro repeat with Abilene TX 
*	destring countyfips fips, replace
	replace countyfips = 12086 if countyfips == 12025
	
	// Merge in the crosswalk
	merge m:1 countyfips using "$path\Raw_Data\2003_crosswalk_all_id_bc.dta"
	keep if _merge == 3
	drop _merge
	
	keep year metro countyfips fips households house_value
	
	
	
	cap append using `temp_house_value'
	save `temp_house_value', replace
}

use `temp_fam_inc', clear
merge m:1 year metro countyfips fips using `temp_house_value'
keep if _merge == 3
drop _merge
save `temp_fam_inc', replace
*cap rm temp_house_value.dta

********************************************************************************
* Final Data Set
********************************************************************************

order year metro countyfips fips median_income households house_value

sort year metro countyfips fips bracket_no
format fips %16.0f
tostring fips, force replace format(%20.0f)
gen ind_base = 1
save "$output\base_data.dta", replace




********************************************
*** Education Data 
******************************************


*******************************************************
* 0. Load the crosswalk file
*******************************************************
use "$path/Raw_Data/2003_crosswalk_all_id_bc.dta", clear
tempfile crosswalk
save `crosswalk', replace

*******************************************************
* 1980
*******************************************************
import delimited "$path/Raw_Data/Social Explorer/Education/1980/Census Tract/R11721554_SL50.txt", ///
    delimiter("\t") clear


foreach var of varlist geo_* {
    local newname = substr("`var'", 5, .)
    rename `var' `newname'
}

* Create education variables
rename se_t031_001 population
rename se_t031_002 elementary
rename se_t031_003 somehighschool
rename se_t031_005 allhighschool
rename se_t031_006 somecollege
rename se_t031_008 allcollege


gen countyfips = state * 1000 + county
gen ffips = string(countyfips) + string(tract, "%06.0f")

* (Optional check: to see the proportion of matches)
*tostring fips, force replace
*gen match = (fips == ffips)
*summarize match


replace countyfips = 12086 if countyfips == 12025

* Merge with crosswalk (keeping the metro variable)
merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge
* Add the year variable
gen year = 1980


collapse (sum) population elementary somehighschool allhighschool somecollege allcollege, ///
    by(year metro countyfips fips)


tempfile tab80
save `tab80', replace

*******************************************************
* 1990
*******************************************************
import delimited "$path/Raw_Data/Social Explorer/Education/1990/Census Tract/R11728848_SL140.txt", ///
    delimiter("\t") clear


foreach var of varlist geo_* {
    local newname = substr("`var'", 5, .)
    rename `var' `newname'
}

gen countyfips = state * 1000 + county
gen ffips = string(countyfips) + string(tract, "%06.0f")

* (Optional: check if fips equals ffips)
* gen match = (fips == ffips)
* summarize match

replace countyfips = 12086 if countyfips == 12025


gen population = se_t022_001
egen allcollege = rowtotal(se_t022_005 se_t022_006)
egen somecollege = rowtotal(se_t022_004 se_t022_005 se_t022_006)
gen allhighschool = se_t022_003
gen elementary    = se_t022_002

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge
gen year = 1990

collapse (sum) population elementary allhighschool somecollege allcollege, ///
    by(year metro countyfips fips)

tempfile tab90
save `tab90', replace

*******************************************************
* 2000
*******************************************************
import delimited "$path/Raw_Data/Social Explorer/Education/2000/Census Tract/R11728843_SL140.txt", ///
    delimiter("\t") clear

* Remove "geo_" prefix from variable names
foreach var of varlist geo_* {
    local newname = substr("`var'", 5, .)
    rename `var' `newname'
}

gen countyfips = state * 1000 + county
gen ffips = string(countyfips) + string(tract, "%06.0f")

replace countyfips = 12086 if countyfips == 12025

gen population   = se_t040_001
gen elementary   = se_t040_002
gen allhighschool= se_t040_003
egen somecollege  = rowtotal(se_t040_004 se_t040_005 se_t040_006 se_t040_007 se_t040_008)
egen allcollege   = rowtotal(se_t040_005 se_t040_006 se_t040_007 se_t040_008)

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

gen year = 2000

collapse (sum) population elementary allhighschool somecollege allcollege, ///
    by(year metro countyfips fips)

tempfile tab00
save `tab00', replace

*******************************************************
* 2010
*******************************************************
import delimited "$path/Raw_Data/Social Explorer/Education/2010/Census Tract/R11740917_SL140.txt", ///
    delimiter("\t") clear

foreach var of varlist geo_* {
    local newname = substr("`var'", 5, .)
    rename `var' `newname'
}

gen countyfips = state * 1000 + county
gen ffips = string(countyfips) + string(tract, "%06.0f")

replace countyfips = 12086 if countyfips == 12025

gen population   = se_t025_001
gen elementary   = se_t025_002
gen allhighschool= se_t025_003
egen somecollege  = rowtotal(se_t025_004 se_t025_005 se_t025_006 se_t025_007 se_t025_008)
egen allcollege   = rowtotal(se_t025_005 se_t025_006 se_t025_007 se_t025_008)

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

gen year = 2010

collapse (sum) population elementary allhighschool somecollege allcollege, ///
    by(year metro countyfips fips)

tempfile tab10
save `tab10', replace



* Start with 1980 data
use `tab80', clear

* Append the remaining years
append using `tab90'
append using `tab00'
append using `tab10'

* Keep only the common variables in the final output
keep year metro countyfips fips population elementary allhighschool somecollege allcollege


format fips %16.0f
tostring fips, force replace format(%20.0f)
keep year metro fips countyfips population allcollege 
gen total = population 
rename allcollege college 

* Save the final dataset
gen ind_edu = 1
save "$output/education_stats.dta", replace











******************************************
*** Children Data 
******************************************






*
* This do–file replicates the R children_data processing.
* CSV file names:
*   1980: nhgis0010_ds111_1980_tract_02498.csv
*   1990: nhgis0003_ds125_1990_tract_141.csv
*   2000: nhgis0032_ds151_2000_tract.csv
*   2010: nhgis0015_ds192_20125_2012_tract.csv
*
* Only the fips construction is modified to match the R code.
* Additionally, for 2010 we drop any existing year variable and generate year=2010.
********************************************************************************

* Set global paths

local year 1980 1990 2000 2010

********************************************************************************
* Process Children Data for Each Year and Append
********************************************************************************


****************
** 1980 
****************
import delimited using "$raw_data\Social Explorer\Children\1980\nhgis0010_ds111_1980_tract_02498.csv", clear
renpfix dzk f_x
gen countyfips = statea * 1000 + countya   // countyfips remains numeric

gen uid = subinstr(gisjoin, "G", "", .)
gen subid = ""
replace subid = string(statea, "%02.0f") + "00" + string(smsaa, "%04.0f") + string(countya, "%03.0f") if missing(placea)
replace subid = string(statea, "%02.0f") + "00" + string(smsaa, "%04.0f") + string(countya, "%03.0f") + string(placea, "%04.0f") if !missing(placea)
gen getTract = substr(uid, strlen(subid)+1, .)
replace getTract = getTract + "00" if strlen(getTract)==4
gen fips = string(countyfips, "%0.0f") + getTract

replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
keep if _merge==3
drop _merge

collapse (sum) f_x*, by(year metro countyfips fips)

** Generate count of kids 
egen f_count_ct_kids1 = rowtotal(f_x001 f_x018 f_x052 f_x069 f_x103 f_x120)
egen f_count_ct_kids2 = rowtotal(f_x002 f_x019 f_x053 f_x070 f_x104 f_x121)
egen f_count_ct_kids3 = rowtotal(f_x003 f_x020 f_x054 f_x071 f_x105 f_x122)
egen f_count_ct_kids4 = rowtotal(f_x004 f_x021 f_x055 f_x072 f_x106 f_x123)
egen f_count_ct_kids5 = rowtotal(f_x005 f_x022 f_x056 f_x073 f_x107 f_x124)
egen f_count_ct_kids6 = rowtotal(f_x006 f_x023 f_x057 f_x074 f_x108 f_x125)
egen f_count_ct_kids7 = rowtotal(f_x007 f_x024 f_x058 f_x075 f_x109 f_x126)
egen f_count_ct_kids8 = rowtotal(f_x008 f_x025 f_x059 f_x076 f_x110 f_x127)
egen f_count_ct_kids9 = rowtotal(f_x009 f_x026 f_x060 f_x077 f_x111 f_x128)
egen f_count_ct_kids10 = rowtotal(f_x010 f_x027 f_x061 f_x078 f_x112 f_x129)
egen f_count_ct_kids11 = rowtotal(f_x011 f_x028 f_x062 f_x079 f_x113 f_x130)
egen f_count_ct_kids12 = rowtotal(f_x012 f_x029 f_x063 f_x080 f_x114 f_x131)
egen f_count_ct_kids13 = rowtotal(f_x013 f_x030 f_x064 f_x081 f_x115 f_x132)
egen f_count_ct_kids14 = rowtotal(f_x014 f_x031 f_x065 f_x082 f_x116 f_x133)
egen f_count_ct_kids15 = rowtotal(f_x015 f_x032 f_x066 f_x083 f_x117 f_x134)
egen f_count_ct_kids16 = rowtotal(f_x016 f_x033 f_x067 f_x084 f_x118 f_x135)
egen f_count_ct_kids17 = rowtotal(f_x017 f_x034 f_x068 f_x085 f_x119 f_x136)


** Generate counts without kids 
egen f_count_ct_wokids1 = rowtotal(f_x035 f_x086 f_x137)
egen f_count_ct_wokids2 = rowtotal(f_x036 f_x087 f_x138)
egen f_count_ct_wokids3 = rowtotal(f_x037 f_x088 f_x139)
egen f_count_ct_wokids4 = rowtotal(f_x038 f_x089 f_x140)
egen f_count_ct_wokids5 = rowtotal(f_x039 f_x090 f_x141)
egen f_count_ct_wokids6 = rowtotal(f_x040 f_x091 f_x142)
egen f_count_ct_wokids7 = rowtotal(f_x041 f_x092 f_x143)
egen f_count_ct_wokids8 = rowtotal(f_x042 f_x093 f_x144)
egen f_count_ct_wokids9 = rowtotal(f_x043 f_x094 f_x145)
egen f_count_ct_wokids10 = rowtotal(f_x044 f_x095 f_x146)
egen f_count_ct_wokids11 = rowtotal(f_x045 f_x096 f_x147)
egen f_count_ct_wokids12 = rowtotal(f_x046 f_x097 f_x148)
egen f_count_ct_wokids13 = rowtotal(f_x047 f_x098 f_x149)
egen f_count_ct_wokids14 = rowtotal(f_x048 f_x099 f_x150)
egen f_count_ct_wokids15 = rowtotal(f_x049 f_x100 f_x151)
egen f_count_ct_wokids16 = rowtotal(f_x050 f_x101 f_x152)
egen f_count_ct_wokids17 = rowtotal(f_x051 f_x102 f_x153)


keep year metro countyfips fips f_count_ct*
reshape long f_count_ct_kids f_count_ct_wokids, i(year metro countyfips fips) j(bracket_no)
replace bracket_no=bracket_no+1

** Merge the income-brackets
merge m:1 year bracket_no using "$path\Raw_Data\inc_brackets.dta"
tab year if _merge!=3
keep if _merge==3
drop _merge
gen income = (low + high)/2

replace countyfips = 12025 if countyfips == 12086 
replace fips = string(countyfips) + fips
replace countyfips = 12086 if countyfips == 12025

sort year metro countyfips fips bracket_no

tempfile children_data
save `children_data', replace 


****************
** 1990 
****************
import delimited using "$raw_data\Social Explorer\Children\1990\nhgis0003_ds125_1990_tract_141.csv", clear
renpfix fhi f_x
gen countyfips = statea * 1000 + countya   // countyfips remains numeric

tostring anpsadpi, replace
keep if strpos(anpsadpi, "Tract") > 0
split anpsadpi, parse(" ") generate(part)
destring part2, replace
gen tractI = part2
drop part*
gen newtract_temp = string(tractI, "%04.2f")
replace newtract_temp = subinstr(newtract_temp, ".", "", .)
destring newtract_temp, replace
gen newtract = string(newtract_temp, "%06.0f")
drop newtract_temp
gen fips = string(countyfips, "%0.0f") + newtract

replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
keep if _merge==3
drop _merge
collapse (sum) f_x*, by(year metro countyfips fips)




** families with kids 
* Loop over the indices corresponding to f_x003, f_x007, …, f_x299
forvalues i = 3(4)299 {
    * Construct the variable names using 3-digit formatting
    local var1 = "f_x" + string(`i'-2, "%03.0f")
    local var2 = "f_x" + string(`i'-1, "%03.0f")
    local var3 = "f_x" + string(`i', "%03.0f")
    
    di "Replacing `var3' with the sum of `var1', `var2', and `var3'"
    replace `var3' = `var1' + `var2' + `var3'
}


egen f_count_ct_kids1  = rowtotal(f_x003 f_x007 f_x011)
egen f_count_ct_kids2  = rowtotal(f_x015 f_x019 f_x023)
egen f_count_ct_kids3  = rowtotal(f_x027 f_x031 f_x035)
egen f_count_ct_kids4  = rowtotal(f_x039 f_x043 f_x047)
egen f_count_ct_kids5  = rowtotal(f_x051 f_x055 f_x059)
egen f_count_ct_kids6  = rowtotal(f_x063 f_x067 f_x071)
egen f_count_ct_kids7  = rowtotal(f_x075 f_x079 f_x083)
egen f_count_ct_kids8  = rowtotal(f_x087 f_x091 f_x095)
egen f_count_ct_kids9  = rowtotal(f_x099 f_x103 f_x107)
egen f_count_ct_kids10 = rowtotal(f_x111 f_x115 f_x119)
egen f_count_ct_kids11 = rowtotal(f_x123 f_x127 f_x131)
egen f_count_ct_kids12 = rowtotal(f_x135 f_x139 f_x143)
egen f_count_ct_kids13 = rowtotal(f_x147 f_x151 f_x155)
egen f_count_ct_kids14 = rowtotal(f_x159 f_x163 f_x167)
egen f_count_ct_kids15 = rowtotal(f_x171 f_x175 f_x179)
egen f_count_ct_kids16 = rowtotal(f_x183 f_x187 f_x191)
egen f_count_ct_kids17 = rowtotal(f_x195 f_x199 f_x203)
egen f_count_ct_kids18 = rowtotal(f_x207 f_x211 f_x215)
egen f_count_ct_kids19 = rowtotal(f_x219 f_x223 f_x227)
egen f_count_ct_kids20 = rowtotal(f_x231 f_x235 f_x239)
egen f_count_ct_kids21 = rowtotal(f_x243 f_x247 f_x251)
egen f_count_ct_kids22 = rowtotal(f_x255 f_x259 f_x263)
egen f_count_ct_kids23 = rowtotal(f_x267 f_x271 f_x275)
egen f_count_ct_kids24 = rowtotal(f_x279 f_x283 f_x287)
egen f_count_ct_kids25 = rowtotal(f_x291 f_x295 f_x299)




** families without kids 
egen f_count_ct_wokids1  = rowtotal(f_x004 f_x008 f_x012)
egen f_count_ct_wokids2  = rowtotal(f_x016 f_x020 f_x024)
egen f_count_ct_wokids3  = rowtotal(f_x028 f_x032 f_x036)
egen f_count_ct_wokids4  = rowtotal(f_x040 f_x044 f_x048)
egen f_count_ct_wokids5  = rowtotal(f_x052 f_x056 f_x060)
egen f_count_ct_wokids6  = rowtotal(f_x064 f_x068 f_x072)
egen f_count_ct_wokids7  = rowtotal(f_x076 f_x080 f_x084)
egen f_count_ct_wokids8  = rowtotal(f_x088 f_x092 f_x096)
egen f_count_ct_wokids9  = rowtotal(f_x100 f_x104 f_x108)
egen f_count_ct_wokids10 = rowtotal(f_x112 f_x116 f_x120)
egen f_count_ct_wokids11 = rowtotal(f_x124 f_x128 f_x132)
egen f_count_ct_wokids12 = rowtotal(f_x136 f_x140 f_x144)
egen f_count_ct_wokids13 = rowtotal(f_x148 f_x152 f_x156)
egen f_count_ct_wokids14 = rowtotal(f_x160 f_x164 f_x168)
egen f_count_ct_wokids15 = rowtotal(f_x172 f_x176 f_x180)
egen f_count_ct_wokids16 = rowtotal(f_x184 f_x188 f_x192)
egen f_count_ct_wokids17 = rowtotal(f_x196 f_x200 f_x204)
egen f_count_ct_wokids18 = rowtotal(f_x208 f_x212 f_x216)
egen f_count_ct_wokids19 = rowtotal(f_x220 f_x224 f_x228)
egen f_count_ct_wokids20 = rowtotal(f_x232 f_x236 f_x240)
egen f_count_ct_wokids21 = rowtotal(f_x244 f_x248 f_x252)
egen f_count_ct_wokids22 = rowtotal(f_x256 f_x260 f_x264)
egen f_count_ct_wokids23 = rowtotal(f_x268 f_x272 f_x276)
egen f_count_ct_wokids24 = rowtotal(f_x280 f_x284 f_x288)
egen f_count_ct_wokids25 = rowtotal(f_x292 f_x296 f_x300)


keep year metro countyfips fips f_count_ct*
reshape long f_count_ct_kids f_count_ct_wokids, i(year metro countyfips fips) j(bracket_no)
replace bracket_no=bracket_no+1

** Merge the income-brackets
merge m:1 year bracket_no using "$path\Raw_Data\inc_brackets.dta"
tab year if _merge!=3
keep if _merge==3
drop _merge
gen income = (low + high)/2

replace countyfips = 12025 if countyfips == 12086 
replace fips = string(countyfips) + fips
replace countyfips = 12086 if countyfips == 12025
sort year metro countyfips fips bracket_no


append using `children_data'
save `children_data', replace 





****************
** 2000
****************
import delimited using "$raw_data\Social Explorer\Children\2000\nhgis0032_ds151_2000_tract.csv", clear
renpfix gyi f_x
gen countyfips = statea * 1000 + countya   // countyfips remains numeric

split name, parse(" ") generate(npart)
gen tractI = npart3
destring tractI, replace
gen newtract_temp = string(tractI, "%04.2f")
replace newtract_temp = subinstr(newtract_temp, ".", "", .)
destring newtract_temp, replace
gen newtract = string(newtract_temp, "%06.0f")
drop newtract_temp
drop npart*
gen fips = string(countyfips, "%0.0f") + newtract

replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
keep if _merge==3
drop _merge
collapse (sum) f_x*, by(year metro countyfips fips)



** families with kids 
egen f_count_ct_kids1  = rowtotal(f_x001 f_x033 f_x065)
egen f_count_ct_kids2  = rowtotal(f_x002 f_x034 f_x066)
egen f_count_ct_kids3  = rowtotal(f_x003 f_x035 f_x067)
egen f_count_ct_kids4  = rowtotal(f_x004 f_x036 f_x068)
egen f_count_ct_kids5  = rowtotal(f_x005 f_x037 f_x069)
egen f_count_ct_kids6  = rowtotal(f_x006 f_x038 f_x070)
egen f_count_ct_kids7  = rowtotal(f_x007 f_x039 f_x071)
egen f_count_ct_kids8  = rowtotal(f_x008 f_x040 f_x072)
egen f_count_ct_kids9  = rowtotal(f_x009 f_x041 f_x073)
egen f_count_ct_kids10 = rowtotal(f_x010 f_x042 f_x074)
egen f_count_ct_kids11 = rowtotal(f_x011 f_x043 f_x075)
egen f_count_ct_kids12 = rowtotal(f_x012 f_x044 f_x076)
egen f_count_ct_kids13 = rowtotal(f_x013 f_x045 f_x077)
egen f_count_ct_kids14 = rowtotal(f_x014 f_x046 f_x078)
egen f_count_ct_kids15 = rowtotal(f_x015 f_x047 f_x079)
egen f_count_ct_kids16 = rowtotal(f_x016 f_x048 f_x080)



** families without kids 

egen f_count_ct_wokids1  = rowtotal(f_x017 f_x049 f_x081)
egen f_count_ct_wokids2  = rowtotal(f_x018 f_x050 f_x082)
egen f_count_ct_wokids3  = rowtotal(f_x019 f_x051 f_x083)
egen f_count_ct_wokids4  = rowtotal(f_x020 f_x052 f_x084)
egen f_count_ct_wokids5  = rowtotal(f_x021 f_x053 f_x085)
egen f_count_ct_wokids6  = rowtotal(f_x022 f_x054 f_x086)
egen f_count_ct_wokids7  = rowtotal(f_x023 f_x055 f_x087)
egen f_count_ct_wokids8  = rowtotal(f_x024 f_x056 f_x088)
egen f_count_ct_wokids9  = rowtotal(f_x025 f_x057 f_x089)
egen f_count_ct_wokids10 = rowtotal(f_x026 f_x058 f_x090)
egen f_count_ct_wokids11 = rowtotal(f_x027 f_x059 f_x091)
egen f_count_ct_wokids12 = rowtotal(f_x028 f_x060 f_x092)
egen f_count_ct_wokids13 = rowtotal(f_x029 f_x061 f_x093)
egen f_count_ct_wokids14 = rowtotal(f_x030 f_x062 f_x094)
egen f_count_ct_wokids15 = rowtotal(f_x031 f_x063 f_x095)
egen f_count_ct_wokids16 = rowtotal(f_x032 f_x064 f_x096)



keep year metro countyfips fips f_count_ct*
reshape long f_count_ct_kids f_count_ct_wokids, i(year metro countyfips fips) j(bracket_no)
replace bracket_no=bracket_no+1

** Merge the income-brackets
merge m:1 year bracket_no using "$path\Raw_Data\inc_brackets.dta"
tab year if _merge!=3
keep if _merge==3
drop _merge
gen income = (low + high)/2

replace fips = string(countyfips) + fips
sort year metro countyfips fips bracket_no



append using `children_data'
save `children_data', replace 





****************
** 2010
****************
import delimited using "$raw_data\Social Explorer\Children\2010\nhgis0015_ds192_20125_2012_tract.csv", clear
ds rcsm*
drop `r(varlist)'
renpfix rcse f_x
drop year
gen year = 2010
gen countyfips = statea * 1000 + countya   // countyfips remains numeric

tostring name_e, replace
split name_e, parse(" ") generate(e)
gen tractI_clean = subinstr(e3, ",", "", .)
destring tractI_clean, replace
gen tractI_clean2 = string(tractI_clean, "%4.2f")
replace tractI_clean2 = subinstr(tractI_clean2, ".", "", .)
destring tractI_clean2, replace force
rename tractI_clean2 tractI
drop e* tractI_clean
gen newtract = string(tractI, "%06.0f")
gen fips = string(countyfips, "%0.0f") + newtract


replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
keep if _merge == 3
drop _merge



replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
keep if _merge==3
drop _merge
collapse (sum) f_x*, by(year metro countyfips fips)


** family count kids 
egen f_count_ct_kids1  = rowtotal(f_x004 f_x040 f_x075)
egen f_count_ct_kids2  = rowtotal(f_x005 f_x041 f_x076)
egen f_count_ct_kids3  = rowtotal(f_x006 f_x042 f_x077)
egen f_count_ct_kids4  = rowtotal(f_x007 f_x043 f_x078)
egen f_count_ct_kids5  = rowtotal(f_x008 f_x044 f_x079)
egen f_count_ct_kids6  = rowtotal(f_x009 f_x045 f_x080)
egen f_count_ct_kids7  = rowtotal(f_x010 f_x046 f_x081)
egen f_count_ct_kids8  = rowtotal(f_x011 f_x047 f_x082)
egen f_count_ct_kids9  = rowtotal(f_x012 f_x048 f_x083)
egen f_count_ct_kids10 = rowtotal(f_x013 f_x049 f_x084)
egen f_count_ct_kids11 = rowtotal(f_x014 f_x050 f_x085)
egen f_count_ct_kids12 = rowtotal(f_x015 f_x051 f_x086)
egen f_count_ct_kids13 = rowtotal(f_x016 f_x052 f_x087)
egen f_count_ct_kids14 = rowtotal(f_x017 f_x053 f_x088)
egen f_count_ct_kids15 = rowtotal(f_x018 f_x054 f_x089)
egen f_count_ct_kids16 = rowtotal(f_x019 f_x055 f_x090)



** family count without kids 
egen f_count_ct_wokids1  = rowtotal(f_x021 f_x057 f_x092)
egen f_count_ct_wokids2  = rowtotal(f_x022 f_x058 f_x093)
egen f_count_ct_wokids3  = rowtotal(f_x023 f_x059 f_x094)
egen f_count_ct_wokids4  = rowtotal(f_x024 f_x060 f_x095)
egen f_count_ct_wokids5  = rowtotal(f_x025 f_x061 f_x096)
egen f_count_ct_wokids6  = rowtotal(f_x026 f_x062 f_x097)
egen f_count_ct_wokids7  = rowtotal(f_x027 f_x063 f_x098)
egen f_count_ct_wokids8  = rowtotal(f_x028 f_x064 f_x099)
egen f_count_ct_wokids9  = rowtotal(f_x029 f_x065 f_x100)
egen f_count_ct_wokids10 = rowtotal(f_x030 f_x066 f_x101)
egen f_count_ct_wokids11 = rowtotal(f_x031 f_x067 f_x102)
egen f_count_ct_wokids12 = rowtotal(f_x032 f_x068 f_x103)
egen f_count_ct_wokids13 = rowtotal(f_x033 f_x069 f_x104)
egen f_count_ct_wokids14 = rowtotal(f_x034 f_x070 f_x105)
egen f_count_ct_wokids15 = rowtotal(f_x035 f_x071 f_x106)
egen f_count_ct_wokids16 = rowtotal(f_x036 f_x072 f_x107)




keep year metro countyfips fips f_count_ct*
reshape long f_count_ct_kids f_count_ct_wokids, i(year metro countyfips fips) j(bracket_no)
replace bracket_no=bracket_no+1

** Merge the income-brackets
merge m:1 year bracket_no using "$path\Raw_Data\inc_brackets.dta"
tab year if _merge!=3
keep if _merge==3
drop _merge
gen income = (low + high)/2

replace fips = string(countyfips) + fips
sort year metro countyfips fips bracket_no



append using `children_data'
save `children_data', replace 

gen ind_children = 1
save "$output/children_data", replace 








************************************
** Mean Family Income 
***********************************

use "$path/Raw_Data/2003_crosswalk_all_id_bc.dta", clear
tempfile crosswalk
save `crosswalk', replace

* Load income brackets if needed
* use "../../Lookups/Income Brackets/inc_brackets.dta", clear
*temfile inc_brackets
*save `inc_bracket', replace

*******************************************************
* 1980
*******************************************************


import delimited "$path/Raw_Data/Social Explorer/AggregateIncome/1980/nhgis0039_ds111_1980_tract_02498.csv", delimiter(",") clear

gen uid = subinstr(gisjoin, "G", "", .)

gen subid = ""
replace subid = string(statea, "%02.0f") + "00" + string(smsaa, "%04.0f") + string(countya, "%03.0f") if missing(placea)
replace subid = string(statea, "%02.0f") + "00" + string(smsaa, "%04.0f") + string(countya, "%03.0f") + string(placea, "%04.0f") if !missing(placea)


gen uidChar = strlen(uid)
gen subidChar = strlen(subid)
gen getTract = substr(uid, subidChar+1, uidChar - subidChar)

gen newtract = cond(strlen(getTract) == 4, getTract + "00", getTract)

gen countyfips = statea * 1000 + countya
gen fips = string(countyfips) + newtract

replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


gen kids_income = dzm001 + dzm002 + dzm004 + dzm005 + dzm007 + dzm008
gen no_kids_income = dzm003 + dzm006 + dzm009


collapse (sum) kids_income no_kids_income, by(metro countyfips fips)
gen year = 1980

tempfile temp80
save `temp80', replace

*******************************************************
* 1990 
*******************************************************

import delimited "$path/Raw_Data/Social Explorer/AggregateIncome/1990/nhgis0036_ds123_1990_tract.csv", delimiter(",") clear


gen str newtract = word(anpsadpi, 2)

destring newtract, replace force
gen str_temp = string(newtract, "%04.2f")
replace str_temp = subinstr(str_temp, ".", "", .)
gen newtract_final = string(real(str_temp), "%06.0f")
drop newtract
rename newtract_final newtract


gen countyfips = statea * 1000 + countya
gen fips = string(countyfips) + newtract

replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


egen kids_income = rowtotal(e0t001 e0t003 e0t005)
egen no_kids_income = rowtotal(e0t002 e0t004 e0t006)

collapse (sum) kids_income no_kids_income, by(metro countyfips fips)
gen year = 1990

tempfile temp90
save `temp90', replace

*******************************************************
* 2000 
*******************************************************

import delimited "$path/Raw_Data/Social Explorer/AggregateIncome/2000/nhgis0037_ds151_2000_tract.csv", delimiter(",") clear

gen countyfips = statea * 1000 + countya

tostring tracta, format(%06.0f) gen(newtract)
gen fips = string(countyfips) + newtract

replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


egen kids_income = rowtotal(gyo001 gyo003 gyo005)
egen no_kids_income = rowtotal(gyo002 gyo004 gyo006)

collapse (sum) kids_income no_kids_income, by(metro countyfips fips)
gen year = 2000

tempfile temp00
save `temp00', replace

*******************************************************
* 2010 
*******************************************************

import delimited "$path/Raw_Data/Social Explorer/AggregateIncome/2010/nhgis0038_ds192_20125_2012_tract.csv", delimiter(",") clear

gen countyfips = statea * 1000 + countya
gen fips = string(countyfips) + string(tracta, "%06.0f")
replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


egen kids_income = rowtotal(rcpe003 rcpe007 rcpe010)
egen no_kids_income = rowtotal(rcpe004 rcpe008 rcpe011)

collapse (sum) kids_income no_kids_income, by(metro countyfips fips)
gen year = 2010

tempfile temp10
save `temp10', replace

*******************************************************
* 2010 Mean Family Income Data
*******************************************************
* Note: 2010 has a lot of 0 values for mean_kids_income compared to other years
* We replace the zero values with mean family income 

import delimited "$path/Raw_Data/Social Explorer/Income/2010/Census Tract/R11739176_SL140.txt", delimiter("\t") clear


foreach var of varlist geo_* {
    local newname = substr("`var'", 5, .)
    rename `var' `newname'
}

gen countyfips = state * 1000 + county
gen ffips = string(countyfips) + string(tract, "%06.0f")
replace countyfips = 12086 if countyfips == 12025


gen mean_income = se_t062_001
gen year = 2010

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


keep year metro countyfips fips mean_income
gen str_fips =  string(fips, "%11.0f")
drop fips
rename str_fips fips

tempfile meanfam10
save `meanfam10', replace

*******************************************************
* Combine Aggregate Income Data (1980, 1990, 2000, 2010)
*******************************************************

use `temp80', clear
append using `temp90'
append using `temp00'
append using `temp10'

tempfile temp_income
save `temp_income', replace

*******************************************************
* Process Children Data to Compute Counts
* Calculate the mean income 
* Kids dataset
use "$output/children_data.dta", clear

collapse (sum) kids = f_count_ct_kids no_kids = f_count_ct_wokids, by(year metro countyfips fips)
tempfile countKids
save `countKids', replace


*******************************************************
* Merge Income and Children Data to Calculate Mean Incomes
*******************************************************

use `countKids', clear
merge 1:1 year metro countyfips fips using `temp_income'
keep if _merge == 3
drop _merge

gen mean_kids_income = kids_income / kids
gen mean_no_kids_income = no_kids_income / no_kids


merge 1:1 year metro countyfips fips using `meanfam10', keepusing(mean_income)


replace mean_kids_income = mean_income if mean_kids_income==0 & year==2010
replace mean_no_kids_income = mean_income if mean_no_kids_income==0 & year==2010


keep year metro countyfips fips mean_kids_income mean_no_kids_income
gen ind_kids_income = 1
save "$output/families_mean_income.dta", replace



















************************************
** Housing Data 
***********************************



use "$path/Raw_Data/2003_crosswalk_all_id_bc.dta", clear
tempfile crosswalk
save `crosswalk', replace

* Load income brackets if needed
* use "../../Lookups/Income Brackets/inc_brackets.dta", clear
* tempfile inc_brackets
* save `inc_bracket', replace

* Define the years to process 
local years "1980 1990 2000 2010"

*******************************************************
* 1. Median Gross Rent at the Census Tract Level (1980–2010)
*******************************************************
tempfile rent_median_all

save `rent_median_all', emptyok replace

foreach yr of local years {
    di "Processing Gross Rent for year `yr'"
    
    local path = "$path/Raw_Data/Social Explorer/Rent/`yr'/"
    
     if "`yr'"=="1980" {
         local csvfile = "`path'nhgis0049_ds107_1980_tract.csv"
    }
	else if "`yr'"=="1990" {
         local csvfile = "`path'nhgis0050_ds123_1990_tract.csv"
    }
    else if "`yr'"=="2000" {
         local csvfile = "`path'nhgis0051_ds151_2000_tract.csv"
    }
    else {
         local csvfile = "`path'nhgis0052_ds191_20125_tract.csv"
    }
    
    import delimited using "`csvfile'", delimiter(",") clear
    

    gen countyfips = statea * 1000 + countya
    
    if `yr' == 1980 {
        gen tractchar = word(areaname, 6)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else if `yr' == 1990 {
        gen tractchar = word(anpsadpi, 2)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else {
        gen getTract = string(tracta, "%06.0f")
    }
   
    gen fips = string(countyfips) + getTract
    
    replace countyfips = 12086 if countyfips == 12025

    if `yr' == 1980 {
        rename dfk001 rent
    }
    else if `yr' == 1990 {
        rename eyu001 rent
    }
    else if `yr' == 2000 {
        rename gbo001 rent
    }
    else if `yr' == 2010 {
        rename qzte001 rent
		replace year = "2010"
		destring year, force replace
    }
    
    merge m:1 countyfips using `crosswalk', keepusing(metro)
	keep if _merge == 3
	drop _merge
    
    collapse (median) median_rent = rent, by(year metro countyfips fips)
    append using `rent_median_all'
    save `rent_median_all', replace
	
}
drop county_str metro_str
drop if year==.
save `rent_median_all', replace  // This data matchesd perfectly 

*******************************************************
* 2. Median Owner Costs by Mortgage Status
*******************************************************
clear
tempfile owner_costs_all
save `owner_costs_all', emptyok replace

foreach yr of local years {
    di "Processing Owner Costs for year `yr'"
    
    local path = "$path/Raw_Data/Social Explorer/Owner Costs/`yr'/"
    
     if "`yr'"=="1980" {
         local csvfile = "`path'nhgis0053_ds107_1980_tract.csv"
    }
	else if "`yr'"=="1990" {
         local csvfile = "`path'nhgis0054_ds123_1990_tract.csv"
    }
    else if "`yr'"=="2000" {
         local csvfile = "`path'nhgis0055_ds151_2000_tract.csv"
    }
    else {
         local csvfile = "`path'nhgis0056_ds191_20125_tract.csv"
    }
    
    import delimited using "`csvfile'", delimiter(",") clear
    gen countyfips = statea * 1000 + countya
    
    if `yr' == 1980 {
        gen tractchar = word(areaname, 6)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else if `yr' == 1990 {
        gen tractchar = word(anpsadpi, 2)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else {
        gen getTract = string(tracta, "%06.0f")
    }
    
    gen fips = string(countyfips) + getTract
    replace countyfips = 12086 if countyfips == 12025
    
    if `yr' == 1980 {
        rename dfs001 costs_with_mortgage
        rename dfs002 costs_wo_mortgage
    }
    else if `yr' == 1990 {
        rename ey6001 costs_with_mortgage
        rename ey6002 costs_wo_mortgage
    }
    else if `yr' == 2000 {
		rename gcu001 costs_with_mortgage
        rename gcu002 costs_wo_mortgage
    }
    else if `yr' == 2010 {
		rename q0ge002 costs_with_mortgage
        rename q0ge003 costs_wo_mortgage
		replace year = "2010"
		destring year, force replace
    }
    
    merge m:1 countyfips using `crosswalk', keepusing(metro) 
	keep if _merge == 3
	drop _merge
    
    collapse (median) median_costs_mortgage = costs_with_mortgage (mean) median_costs_wo_mortgage = costs_wo_mortgage, by(year metro countyfips fips)
    
    append using `owner_costs_all'
    save `owner_costs_all', replace
}



save `owner_costs_all', replace  //This data matched perfectly 

*******************************************************
* 3. Merge Gross Rent and Owner Cost Data
*******************************************************
use `rent_median_all', clear
merge 1:1 year metro countyfips fips using `owner_costs_all'
keep if _merge == 3
drop _merge

tempfile rent_cost
save `rent_cost', replace
*******************************************************
* 4. Process Rent Counts for Each Year
*******************************************************
tempfile rentcounts_all
clear
save `rentcounts_all', emptyok replace

****1980****
************
import delimited using "$path/Raw_Data/Social Explorer/RentCounts/1980/nhgis0067_ds109_1980_tract_02498.csv", delimiter(",") clear
gen countyfips = statea * 1000 + countya


gen uid = subinstr(gisjoin, "G", "", .)

gen subid = ""
replace subid = string(statea, "%02.0f") + string(smsaa, "%05.0f") + string(countya, "%03.0f") + "0" if missing(placea)
replace subid = string(statea, "%02.0f") + string(smsaa, "%05.0f") + string(countya, "%03.0f") + string(placea, "%05.0f") if !missing(placea)


gen charid = strlen(uid)
gen charsubid = strlen(subid)


gen getTract = substr(uid, charsubid + 1, charid - charsubid)


replace getTract = getTract + "00" if strlen(getTract) == 4


gen fips = string(countyfips) + getTract

replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


local dpuvars
forvalues i = 1(1)89 {
    if mod(`i',10) != 0 {
        local varname = string(`i', "%03.0f")
        local dpuvars `dpuvars' dpu`varname'
    }
}
egen cash_units = rowtotal(`dpuvars')

local dpuvars
forvalues i = 10(10)90 {
    local varname = string(`i', "%03.0f")
    local dpuvars `dpuvars' dpu`varname'
}

egen noncash_units = rowtotal(`dpuvars')


egen rental_units = rowtotal(dpu001-dpu090)

collapse (sum) cash_units noncash_units rental_units, by(year metro countyfips fips)
append using `rentcounts_all'
save `rentcounts_all', replace


****1990****
************
import delimited using "$path/Raw_Data/Social Explorer/RentCounts/1990/nhgis0058_ds125_1990_tract_141.csv", delimiter(",") clear
gen countyfips = statea * 1000 + countya

gen newtract = word(anpsadpi, 2)
destring newtract, replace force
gen tempstr = string(newtract, "%04.2f")
replace tempstr = subinstr(tempstr, ".", "", .)
gen getTract = string(real(tempstr), "%06.0f")
drop tempstr newtract

gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025


merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


local fcpvars
local usedvars
local ranges "1 16 18 33 35 50 52 67 69 84 86 101 103 118 120 135 137 152 154 169 171 186 188 203 205 220"

forvalues i = 1(2)25 {
    local start : word `i' of `ranges'
    local end : word `=`i'+1' of `ranges'
    
    forvalues j = `start'/`end' {
        local varname = string(`j', "%03.0f")
        local fcpvars `fcpvars' fcp`varname'
		local usedvars `usedvars' `j'
    }
}

egen cash_units = rowtotal(`fcpvars')

local remainingvars
forvalues k = 1/221 {
    if strpos(" `usedvars' ", " `k' ") == 0 {
        local varname = string(`k', "%03.0f")
        local remainingvars `remainingvars' fcp`varname'
    }
}


egen noncash_units  = rowtotal(`remainingvars')


egen rental_units = rowtotal(fcp001-fcp221)

collapse (sum) cash_units noncash_units rental_units, by(year metro countyfips fips)
append using `rentcounts_all'
save `rentcounts_all', replace


****2000****
************
import delimited using "$path/Raw_Data/Social Explorer/RentCounts/2000/nhgis0061_ds151_2000_tract.csv", delimiter(",") clear

gen countyfips = statea * 1000 + countya
gen getTract = string(tracta, "%06.0f")
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

egen cash_units = rowtotal(gem001 gem003 gem005 gem007 gem009 gem011 gem013)
egen noncash_units = rowtotal(gem002 gem004 gem006 gem008 gem010 gem012 gem014)
egen rental_units = rowtotal(gem001-gem014)

collapse (sum) cash_units noncash_units rental_units, by(year metro countyfips fips)
append using `rentcounts_all'
save `rentcounts_all', replace

****2010****
************
import delimited using "$path/Raw_Data/Social Explorer/RentCounts/2010/nhgis0060_ds192_20125_tract.csv", delimiter(",") clear

gen countyfips = statea * 1000 + countya
gen getTract = string(tracta, "%06.0f")
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025
replace year = "2010"
destring year, replace

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

egen cash_units = rowtotal(rgue003 rgue020 rgue037 rgue054 rgue071 rgue088 rgue105), missing
egen noncash_units = rowtotal(rgue018 rgue035 rgue052 rgue069 rgue086 rgue103 rgue120), missing
egen rental_units = rowtotal(rgue002 rgue019 rgue036 rgue053 rgue070 rgue087 rgue104), missing


collapse (sum) cash_units noncash_units rental_units, by(year metro countyfips fips)

append using `rentcounts_all'

save `rentcounts_all', replace // mathes but small different to the rounding decimals 
*******************************************************
* 5. Number of owner occupied housing units in each census tract
*******************************************************
tempfile ownercounts_all
clear
save `ownercounts_all', emptyok replace

****1980****
************
import delimited "$path/Raw_Data/Social Explorer/OwnerCounts/1980/nhgis0063_ds107_1980_tract.csv", delimiter(",") clear
gen countyfips = statea*1000 + countya

gen tractchar = word(areaname, 6)
destring tractchar, replace force
gen tempstr = string(tractchar, "%04.2f")
replace tempstr = subinstr(tempstr, ".", "", .)
gen getTract = string(real(tempstr), "%06.0f")
drop tractchar tempstr
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge


egen counts_mortgage = rowtotal(dfr001-dfr012)
egen counts_wo_mortgage = rowtotal(dfr013-dfr020)

collapse (sum) counts_mortgage counts_wo_mortgage, by(year metro countyfips fips)

append using `ownercounts_all'
save `ownercounts_all', replace


****1990****
************
import delimited "$path/Raw_Data/Social Explorer/OwnerCounts/1990/nhgis0064_ds123_1990_tract.csv", delimiter(",") clear
gen countyfips = statea*1000 + countya

gen tractchar = word(anpsadpi, 2)
destring tractchar, replace force
gen tempstr = string(tractchar, "%04.2f")
replace tempstr = subinstr(tempstr, ".", "", .)
gen getTract = string(real(tempstr), "%06.0f")
drop tractchar tempstr
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025

merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

egen counts_mortgage = rowtotal(ey5001-ey5013)
egen counts_wo_mortgage = rowtotal(ey5014-ey5021)

collapse (sum) counts_mortgage counts_wo_mortgage, by(year metro countyfips fips)

append using `ownercounts_all'
save `ownercounts_all', replace


****2000****
************
import delimited "$path/Raw_Data/Social Explorer/OwnerCounts/2000/nhgis0068_ds151_2000_tract.csv", delimiter(",") clear
gen countyfips = statea*1000 + countya
gen getTract = string(tracta, "%06.0f")
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025
merge m:1 countyfips using `crosswalk', keepusing(metro) 
keep if _merge == 3
drop _merge

egen counts_mortgage = rowtotal(gcz001-gcz010)
egen counts_wo_mortgage = rowtotal(gcz011-gcz020)

collapse (sum) counts_mortgage counts_wo_mortgage, by(year metro countyfips fips)

append using `ownercounts_all'
save `ownercounts_all', replace

****2010****
************
import delimited "$path/Raw_Data/Social Explorer/OwnerCounts/2010/nhgis0065_ds191_20125_tract.csv", delimiter(",") clear
gen countyfips = statea*1000 + countya

gen getTract = string(tracta, "%06.0f")
gen fips = string(countyfips) + getTract
replace countyfips = 12086 if countyfips == 12025
replace year = "2010"
destring year, replace
merge m:1 countyfips using `crosswalk', keepusing(metro)
keep if _merge == 3
drop _merge

gen counts_mortgage = q0fe002
gen counts_wo_mortgage = q0fe018

collapse (sum) counts_mortgage counts_wo_mortgage, by(year metro countyfips fips)

append using `ownercounts_all'
save `ownercounts_all', replace  // matches perfectly 

*******************************************************
* 6. Aggregate Gross Rent at the census tract level (1980 - 2010) 
*******************************************************

tempfile agg_rent_all
clear
save `agg_rent_all', emptyok replace

foreach yr of local years {
    di "Processing Gross Rent for year `yr'"
    
    local path = "$path/Raw_Data/Social Explorer/AggregateRent/`yr'/"
    
     if "`yr'"=="1980" {
         local csvfile = "`path'nhgis0069_ds107_1980_tract.csv"
    }
	else if "`yr'"=="1990" {
         local csvfile = "`path'nhgis0070_ds123_1990_tract.csv"
    }
    else if "`yr'"=="2000" {
         local csvfile = "`path'nhgis0071_ds151_2000_tract.csv"
    }
    else {
         local csvfile = "`path'nhgis0072_ds191_20125_tract.csv"
    }
    
    import delimited using "`csvfile'", delimiter(",") clear
    

    gen countyfips = statea*1000 + countya
    if `yr' == 1980 {
        gen tractchar = word(areaname, 6)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else if `yr' == 1990 {
        gen tractchar = word(anpsadpi, 2)
        destring tractchar, replace force
        gen tempstr = string(tractchar, "%04.2f")
        replace tempstr = subinstr(tempstr, ".", "", .)
        gen getTract = string(real(tempstr), "%06.0f")
        drop tractchar tempstr
    }
    else {
        gen getTract = string(tracta, "%06.0f")
    }
    gen fips = string(countyfips) + getTract
    replace countyfips = 12086 if countyfips == 12025

    if `yr' == 1980 {
        rename dfl001 rent
    }
    else if `yr' == 1990 {
        rename eyv001 rent
    }
    else if `yr' == 2000 {
        rename gbp001 rent
    }
    else if `yr' == 2010 {
        rename qzue001 rent
		replace year = "2010"
		destring year, replace
    }
    
    merge m:1 countyfips using `crosswalk', keepusing(metro) 
	keep if _merge == 3
	drop _merge
    
    collapse (sum) agg_rent = rent, by(year metro countyfips fips)
    append using `agg_rent_all'
    save `agg_rent_all', replace
}
*******************************************************
* 7. Merge all
*******************************************************
use `agg_rent_all', clear
merge 1:1 countyfips year fips metro using `ownercounts_all'
drop _merge
merge 1:1 countyfips year fips metro using `rent_cost'
*keep if _merge == 3
drop _merge
merge 1:1 year metro countyfips fips using `rentcounts_all'
drop _merge
drop if agg_rent==.
gen ind_housing = 1
save "$output/housing_R.dta", replace




*** Extract the MSA population data for future use 
use "$output/education_stats", clear
collapse (sum) population, by(year metro)
save "$output/popweight.dta", replace 

** Merge the different cleaned files together: All of these will be at census tract level 
use "$output\base_data.dta", clear
merge 1:1 year metro countyfips fips bracket_no using "$output/children_data", keepusing(f_count_ct_kids f_count_ct_wokids ind_children)
drop _merge 
merge m:1 year metro countyfips fips using "$output/education_stats"
drop _merge
merge m:1 year metro countyfips fips using "$output/families_mean_income", keepusing(mean_kids_income mean_no_kids_income ind_kids_income)
drop _merge
merge m:1 year metro countyfips fips using "$output/housing_R.dta",  keepusing( agg_rent cash_units median_rent median_costs_mortgage counts_mortgage ind_housing)
drop _merge
merge m:1 countyfips metro using  "$path/Raw_Data/2003_crosswalk_all_id_bc.dta"
drop _merge
sort year metro countyfips fips bracket_no

order year metro metro_str countyfips county_str fips bracket_no low high f_count_ct f_count_ct_kids  f_count_ct_wokids population  college total   income mean_income median_income  mean_kids_income mean_no_kids_income agg_rent median_rent cash_units counts_mortgage  median_costs_mortgage 

**** Label the variables 
label variable year "Census Year"
label variable metro "Metro Code"
label variable metro_str "Metro Name"
label variable countyfips "County Identifier"
label variable county_str "County Name"
label variable fips "Census Tract Identifier"
label variable bracket_no "Income Bracker Number"
label variable low "Lower limit of income bracket"
label variable high "Upper limit of income bracket"
label variable f_count_ct "Count of families"
label variable f_count_ct_kids "Count of families with kids"
label variable f_count_ct_wokids "Count of families without kids"
label variable population "Population, Census Tract"
label variable college "College Population, Census Tract"
label variable total "Total Population, Census Tract"
label variable income "Mean income of low and high brackets"
label variable mean_income "Mean family income, census tract"
label variable median_income "Median family income, census tract"
label variable mean_kids_income "Mean family with kids income, census tract"
label variable mean_no_kids_income "Mean family without kids income, census tract"
label variable agg_rent "Aggregate Gross Rents, Census Tract" 
label variable median_rent "Median Rent, Census Tract"
label variable cash_units "Number of units under Rent"
label variable median_costs_mortgage "Median Mortgage Cost "
label variable counts_mortgage "Number of units under Mortgage"
label variable ind_base "Indicator  for all family data"
label variable ind_children "Indicator for families with kids data "
label variable ind_edu "Indicator for education data"
label variable ind_kids_income "Indicator for kids income data"
label variable ind_housing "Indicator for housing data"


save "$output/clean_census_tract.dta", replace 







** remove all the intermediate files 
rm "$output\base_data.dta"
rm "$output/children_data.dta"
rm  "$output/education_stats.dta"
rm  "$output/families_mean_income.dta"
rm "$output/housing_R.dta"



/** Checking the individual files 
use "$output/clean_census_tract.dta", clear  
 
 
use  "$output\base_data.dta", clear
sort year metro countyfips fips bracket_no
use "$output/children_data", clear

use  "$output/education_stats", clear
use "$output/families_mean_income", clear 
use "$output/housing_R", clear 


** to get back the base data we select the following variables 
year metro countyfips fips median_income population households college house_value bracket_no f_count_ct low high income mean_income 

** to get back the children data 
year metro countyfips fips 


** to get back the 


*/













